home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Space & Astronomy
/
Space and Astronomy (October 1993).iso
/
mac
/
VIEWERS
/
MSDOS
/
UNZ50P1.ZIP
/
MATCH.C
< prev
next >
Wrap
C/C++ Source or Header
|
1992-07-10
|
19KB
|
564 lines
/*---------------------------------------------------------------------------
match.c
The match() routine recursively compares a string to a "pattern" (regular
expression), returning TRUE if a match is found or FALSE if not. This
version is specifically for use with unzip.c: as did the previous match()
from SEA, it leaves the case (upper, lower, or mixed) of the string alone,
but converts any uppercase characters in the pattern to lowercase if indi-
cated by the global var pInfo->lcflag (which is to say, string is assumed
to have been converted to lowercase already, if such was necessary).
---------------------------------------------------------------------------*/
#ifdef ZIPINFO
# undef ZIPINFO /* make certain there is only one version of match.o */
#endif /* ZIPINFO */
#include "unzip.h"
static int matche __((register char *p, register char *t));
static int matche_after_star __((register char *p, register char *t));
/* #include "filmatch.h": */
#ifndef BOOLEAN
# define BOOLEAN short int /* v1.2 made it short */
#endif
/* match defines */
#define MATCH_PATTERN 6 /* bad pattern */
#define MATCH_LITERAL 5 /* match failure on literal match */
#define MATCH_RANGE 4 /* match failure on [..] construct */
#define MATCH_ABORT 3 /* premature end of text string */
#define MATCH_END 2 /* premature end of pattern string */
#define MATCH_VALID 1 /* valid match */
/* pattern defines */
#define PATTERN_VALID 0 /* valid pattern */
#define PATTERN_ESC -1 /* literal escape at end of pattern */
#define PATTERN_RANGE -2 /* malformed range in [..] construct */
#define PATTERN_CLOSE -3 /* no end bracket in [..] construct */
#define PATTERN_EMPTY -4 /* [..] contstruct is empty */
/*----------------------------------------------------------------------------
*
* Match the pattern PATTERN against the string TEXT;
*
* match() returns TRUE if pattern matches, FALSE otherwise.
* matche() returns MATCH_VALID if pattern matches, or an errorcode
* as follows otherwise:
*
* MATCH_PATTERN - bad pattern
* MATCH_RANGE - match failure on [..] construct
* MATCH_ABORT - premature end of text string
* MATCH_END - premature end of pattern string
* MATCH_VALID - valid match
*
*
* A match means the entire string TEXT is used up in matching.
*
* In the pattern string:
* `*' matches any sequence of characters (zero or more)
* `?' matches any character
* [SET] matches any character in the specified set,
* [!SET] or [^SET] matches any character not in the specified set.
*
* A set is composed of characters or ranges; a range looks like
* character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the
* minimal set of characters allowed in the [..] pattern construct.
* Other characters are allowed (ie. 8 bit characters) if your system
* will support them.
*
* To suppress the special syntactic significance of any of `[]*?!^-\',
* in a [..] construct and match the character exactly, precede it
* with a `\'.
*
----------------------------------------------------------------------------*/
/*----------------------------------------------------------------------------
*
* Match the pattern PATTERN against the string TEXT;
*
* returns MATCH_VALID if pattern matches, or an errorcode as follows
* otherwise:
*
* MATCH_PATTERN - bad pattern
* MATCH_RANGE - match failure on [..] construct
* MATCH_ABORT - premature end of text string
* MATCH_END - premature end of pattern string
* MATCH_VALID - valid match
*
*
* A match means the entire string TEXT is used up in matching.
*
* In the pattern string:
* `*' matches any sequence of characters (zero or more)
* `?' matches any character
* [SET] matches any character in the specified set,
* [!SET] or [^SET] matches any character not in the specified set.
* \ is allowed within a set to escape a character like ']' or '-'
*
* A set is composed of characters or ranges; a range looks like
* character hyphen character (as in 0-9 or A-Z). [0-9a-zA-Z_] is the
* minimal set of characters allowed in the [..] pattern construct.
* Other characters are allowed (ie. 8 bit characters) if your system
* will support them.
*
* To suppress the special syntactic significance of any of `[]*?!^-\',
* within a [..] construct and match the character exactly, precede it
* with a `\'.
*
----------------------------------------------------------------------------*/
static int matche(p, t)
register char *p;
register char *t;
{
register char range_start, range_end; /* start and end in range */
BOOLEAN invert; /* is this [..] or [!..] */
BOOLEAN member_match; /* have I matched the [..] construct? */
BOOLEAN loop; /* should I terminate? */
for (; *p; p++, t++) {
/* if this is the end of the text then this is the end of the match */
if (!*t)
return ((*p == '*') && (*++p == '\0'))? MATCH_VALID : MATCH_ABORT;
/* determine and react to pattern type */
switch (*p) {
/* single any character match */
case '?':
break;
/* multiple any character match */
case '*':
return matche_after_star (p, t);
/* [..] construct, single member/exclusion character match */
case '[': {
/* move to beginning of range */
p++;
/* check if this is a member match or exclusion match */
invert = FALSE;
if ((*p == '!') || (*p == '^')) {
invert = TRUE;
p++;
}
/* if closing bracket here or at range start then we have a
malformed pattern */
if (*p == ']')
return MATCH_PATTERN;
member_match = FALSE;
loop = TRUE;
while (loop) {
/* if end of construct then loop is done */
if (*p == ']') {
loop = FALSE;
continue;
}
/* matching a '!', '^', '-', '\' or a ']' */
if (*p == '\\')
range_start = range_end = *++p;
else
range_start = range_end = *p;
/* if end of pattern then bad pattern (Missing ']') */
if (!*p)
return MATCH_PATTERN;
/* check for range bar */
if (*++p == '-') {
/* get the range end */
range_end = *++p;
/* if end of pattern or construct then bad pattern */
if ((range_end == '\0') || (range_end == ']'))
return MATCH_PATTERN;
/* special character range end */
if (range_end == '\\') {
range_end = *++p;
/* if end of text then we have a bad pattern */
if (!range_end)
return MATCH_PATTERN;
}
/* move just beyond this range */
p++;
}
/* if the text character is in range then match found.
* make sure the range letters have the proper
* relationship to one another before comparison
*/
if (range_start < range_end) {
if ((*t >= range_start) && (*t <= range_end)) {
member_match = TRUE;
loop = FALSE;
}
} else {
if ((*t >= range_end) && (*t <= range_start)) {
member_match = TRUE;
loop = FALSE;
}
}
}
/* if there was a match in an exclusion set then no match */
/* if there was no match in a member set then no match */
if ((invert && member_match) ||
!(invert || member_match))
return MATCH_RANGE;
/* if this is not an exclusion then skip the rest of the [...]
construct that already matched. */
if (member_match) {
while (*p != ']') {
/* bad pattern (Missing ']') */
if (!*p)
return MATCH_PATTERN;
/* skip exact match */
if (*p == '\\') {
p++;
/* if end of text then we have a bad pattern */
if (!*p)
return MATCH_PATTERN;
}
/* move to next pattern char */
p++;
}
}
break;
} /* switch '[' */
/* must match this character exactly */
default:
#ifdef OLDSTUFF
if (*p != *t)
#else /* !OLDSTUFF */
/* do it like arcmatch() (old unzip) did it (v1.2) */
if (*t != (char) ((pInfo->lcflag && isupper((int)(*p)))?
tolower((int)(*p)) : *p))
#endif /* ?OLDSTUFF */
return MATCH_LITERAL;
} /* switch */
} /* for */
/* if end of text not reached then the pattern fails */
if (*t)
return MATCH_END;
else
return MATCH_VALID;
}
/*----------------------------------------------------------------------------
*
* recursively call matche() with final segment of PATTERN and of TEXT.
*
----------------------------------------------------------------------------*/
static int matche_after_star (p,t)
register char *p;
register char *t;
{
register int match = 0;
register int nextp;
/* pass over existing ? and * in pattern */
while ((*p == '?') || (*p == '*')) {
/* take one char for each ? and +; if end of text then no match */
if ((*p == '?') && (!*t++))
return MATCH_ABORT;
/* move to next char in pattern */
p++;
}
/* if end of pattern we have matched regardless of text left */
if (!*p)
return MATCH_VALID;
/* get the next character to match which must be a literal or '[' */
nextp = *p;
/* Continue until we run out of text or definite result seen */
do {
/* a precondition for matching is that the next character
* in the pattern match the next character in the text or that
* the next pattern char is the beginning of a range. Increment
* text pointer as we go here.
*/
if ((nextp == *t) || (nextp == '['))
match = matche(p, t);
/* if the end of text is reached then no match */
if (!*t++)
match = MATCH_ABORT;
} while ((match != MATCH_VALID) &&
(match != MATCH_ABORT) &&
(match != MATCH_PATTERN));
/* return result */
return match;
}
/*----------------------------------------------------------------------------
*
* match() is a shell to matche() to return only BOOLEAN values.
*
----------------------------------------------------------------------------*/
int match(string,pattern)
char *string;
char *pattern;
{
int error_type;
error_type = matche(pattern,string);
return (error_type == MATCH_VALID ) ? TRUE : FALSE;
}
#ifdef TEST_MATCH
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/
BOOLEAN is_pattern (char *pattern);
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error. Zero is
* returned in error_type if the pattern is a valid one. error_type return
* values are as follows:
*
* PATTERN_VALID - pattern is well formed
* PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
* PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
* PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/
BOOLEAN is_valid_pattern (char *pattern, int *error_type);
int fast_match_after_star (register char *pattern, register char *text);
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has any special wildcard characters
*
----------------------------------------------------------------------------*/
BOOLEAN is_pattern (char *p)
{
while (*p)
switch (*p++) {
case '?':
case '*':
case '[':
return TRUE;
}
return FALSE;
}
/*----------------------------------------------------------------------------
*
* Return TRUE if PATTERN has is a well formed regular expression according
* to the above syntax
*
* error_type is a return code based on the type of pattern error. Zero is
* returned in error_type if the pattern is a valid one. error_type return
* values are as follows:
*
* PATTERN_VALID - pattern is well formed
* PATTERN_RANGE - [..] construct has a no end range in a '-' pair (ie [a-])
* PATTERN_CLOSE - [..] construct has no end bracket (ie [abc-g )
* PATTERN_EMPTY - [..] construct is empty (ie [])
*
----------------------------------------------------------------------------*/
BOOLEAN is_valid_pattern (char *p, int *error_type)
{
/* init error_type */
*error_type = PATTERN_VALID;
/* loop through pattern to EOS */
while (*p) {
/* determine pattern type */
switch (*p) {
/* the [..] construct must be well formed */
case '[':
p++;
/* if the next character is ']' then bad pattern */
if (*p == ']') {
*error_type = PATTERN_EMPTY;
return FALSE;
}
/* if end of pattern here then bad pattern */
if (!*p) {
*error_type = PATTERN_CLOSE;
return FALSE;
}
/* loop to end of [..] construct */
while (*p != ']') {
/* check for literal escape */
if (*p == '\\') {
p++;
/* if end of pattern here then bad pattern */
if (!*p++) {
*error_type = PATTERN_ESC;
return FALSE;
}
} else
p++;
/* if end of pattern here then bad pattern */
if (!*p) {
*error_type = PATTERN_CLOSE;
return FALSE;
}
/* if this a range */
if (*p == '-') {
/* we must have an end of range */
if (!*++p || (*p == ']')) {
*error_type = PATTERN_RANGE;
return FALSE;
} else {
/* check for literal escape */
if (*p == '\\')
p++;
/* if end of pattern here then bad pattern */
if (!*p++) {
*error_type = PATTERN_ESC;
return FALSE;
}
}
}
}
break;
/* all other characters are valid pattern elements */
case '*':
case '?':
default:
p++; /* "normal" character */
break;
} /* switch */
}
return TRUE;
}
/*
* This test main expects as first arg the pattern and as second arg
* the match string. Output is yay or nay on match. If nay on
* match then the error code is parsed and written.
*/
#include <stdio.h>
int main(int argc, char *argv[])
{
int error;
int is_valid_error;
if (argc != 3)
printf("Usage: MATCH Pattern Text\n");
else {
printf("Pattern: %s\n", argv[1]);
printf("Text : %s\n", argv[2]);
if (!is_pattern(argv[1]))
printf(" First Argument Is Not A Pattern\n");
else {
match(argv[1],argv[2]) ? printf("TRUE") : printf("FALSE");
error = matche(argv[1],argv[2]);
is_valid_pattern(argv[1],&is_valid_error);
switch (error) {
case MATCH_VALID:
printf(" Match Successful");
if (is_valid_error != PATTERN_VALID)
printf(" -- is_valid_pattern() is complaining\n");
else
printf("\n");
break;
case MATCH_RANGE:
printf(" Match Failed on [..]\n");
break;
case MATCH_ABORT:
printf(" Match Failed on Early Text Termination\n");
break;
case MATCH_END:
printf(" Match Failed on Early Pattern Termination\n");
break;
case MATCH_PATTERN:
switch (is_valid_error) {
case PATTERN_VALID:
printf(" Internal Disagreement On Pattern\n");
break;
case PATTERN_RANGE:
printf(" No End of Range in [..] Construct\n");
break;
case PATTERN_CLOSE:
printf(" [..] Construct is Open\n");
break;
case PATTERN_EMPTY:
printf(" [..] Construct is Empty\n");
break;
default:
printf(" Internal Error in is_valid_pattern()\n");
}
break;
default:
printf(" Internal Error in matche()\n");
break;
} /* switch */
}
}
return(0);
}
#endif /* TEST_MATCH */